# read in galapagos flora data gala<-read.table('ecol 563/galapagos.txt', header=T) names(gala) # fit normal model in area out.norm1 <- lm(Species~Area, data=gala) plot(Species~Area, data=gala) abline(out.norm1, lty=2) # fit normal model in log area out.norm2 <- lm(Species~log(Area), data=gala) plot(Species~log(Area), data=gala) abline(out.norm2, lty=2) # plot residuals to assess structural form of the predictor plot(residuals(out.norm2)~log(Area), data=gala) abline(h=0, lty=2) # add a nonparametric smoother lines(lowess(residuals(out.norm2)~log(gala$Area)), col=2) # fit nonlinear normal model out.norm3 <- nls(Species~a*Area^b, data=gala, start=list(a=1,b=.5)) summary(out.norm3) plot(Species~Area, data=gala) coef(out.norm3) curve(coef(out.norm3)[1]*x^coef(out.norm3)[2], add=T, col=2) # compare models fit so far AIC(out.norm1, out.norm2, out.norm3) # fit Poisson model out.pois<-glm(Species~log(Area), data=gala, family=poisson) # fit NB-2 model library(MASS) out.NB2<-glm.nb(Species~log(Area), data=gala) library(gamlss) # fit NB-1 model out.NB1<-gamlss(Species~log(Area), data=gala, family=NBII) # NB-2 model from gamlss for comparison out.NB1a<-gamlss(Species~log(Area), data=gala, family=NBI) # compare count models AIC(out.pois, out.NB2, out.NB1, out.NB1a) # plot the count models plot(Species~log(Area), data=gala) curve(exp(coef(out.NB1)[1]+coef(out.NB1)[2]*x), add=T, lty=2) curve(exp(coef(out.pois)[1]+coef(out.pois)[2]*x), add=T, col=4) curve(exp(coef(out.NB2)[1]+coef(out.NB2)[2]*x), add=T, col=2) # fit lognormal model out.lognorm <- lm(log(Species)~log(Area), data=gala) plot(log(Species)~log(Area), data=gala) abline(out.lognorm, lty=2) # the AIC of this model is not comparable to the rest AIC(out.lognorm) # function to calculate correct AIC for comparisons with count models lognorm.LL <- function(y,model,c=0) { sigma2 <- sum(residuals(model)^2)/length(residuals(model)) prob <- ifelse(y==0, pnorm(log(y+c+0.5), mean=predict(model), sd=sqrt(sigma2)), pnorm(log(y+c+0.5), mean=predict(model), sd=sqrt(sigma2))-pnorm(log(y+c-0.5), mean=predict(model), sd=sqrt(sigma2))) LL <- sum(log(prob)) AIC <- -2*LL + 2*(length(coef(model))+1) out <- data.frame(LL=LL, AIC=AIC) print(out, row.names=F) } # calculate AIC and LL lognorm.LL(gala$Species,out.lognorm) # compare with previous models AIC(out.norm1, out.norm2, out.norm3, out.pois, out.NB2, out.NB1) # fit square root normal model out.sqrtnorm <- lm(sqrt(Species)~log(Area), data=gala) # function to calculate correct AIC and LL for comparison with count models sqrtnorm.LL <- function(y,model,c=0) { sigma2 <- sum(residuals(model)^2)/length(residuals(model)) prob <- ifelse(y==0, pnorm(sqrt(y+c+0.5), mean=predict(model), sd=sqrt(sigma2)), pnorm(sqrt(y+c+0.5), mean=predict(model), sd=sqrt(sigma2)) -pnorm(sqrt(y+c-0.5), mean=predict(model), sd=sqrt(sigma2))) LL <- sum(log(prob)) AIC <- -2*LL + 2*(length(coef(model))+1) out <- data.frame(LL=LL, AIC=AIC) print(out, row.names=F) } # obtain AIC and compare with remaining models sqrtnorm.LL(gala$Species,out.sqrtnorm) AIC(out.norm1,out.norm2,out.norm3,out.pois,out.NB2,out.NB1) lognorm.LL(gala$Species,out.lognorm) # demonstrating that mean(log(x)) < log(mean(x)) curve(log(x),from=.8,to=3.5) abline(h=0,lty=2) points(c(1,3), c(log(1), log(3))) segments(1,log(1),3, log(3), lty=2) # plot log of mean of 1 and 3 points(2,log(2), pch=16, col=2, cex=.8) text(2, log(2), 'log of mean', pos=2, col=2, cex=.8) # plot mean of log 1 and log 3. points(2,(log(1)+log(3))/2, col=4, cex=.8, pch=16) text(2,(log(1)+log(3))/2, 'mean of log', col=4, cex=.8, pos=4)